Exercise: Increasing Cohesion
Here is an excerpt from a Python script that processes a CSV file:
Record = dict[str, Any]
def main() -> None:
with open("data.csv") as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
processed_data.append(row_copy)
with open("processed.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows(processed_data)
def main() -> None:
with open("data.csv") as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
processed_data.append(row_copy)
with open("processed.csv", "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows(processed_data)
Why is this code not very cohesive? How can you refactor it to increase cohesion?
Compatible Python Versions: 3.9+
Hello, ArjanCodes Team!
What do you think about this approach?
from pathlib import Path
from typing import List
from dataclasses import dataclass, field
import csv
BASE_DIR = Path(__file__).resolve().parent
CSV_PATH = BASE_DIR / "data.csv"
PROCESSED_CSV_PATH = BASE_DIR / "processed.csv"
@dataclass
class Record:
name: str
status: str
is_active: bool = field(init=False) # Add is_active field
def __post_init__(self):
if self.status == "active":
self.is_active = True
else:
self.is_active = False
class DataHandler:
def __init__(self, csv_path: Path, processed_csv_path: Path):
self.csv_path = csv_path
self.processed_csv_path = processed_csv_path
def read_csv(self) -> List[Record]:
with open(self.csv_path) as f:
reader = csv.DictReader(f)
data: List[Record] = [Record(name=row['name'], status=row['status']) for row in reader]
return data
def write_csv(self, data: List[Record]) -> None:
with open(self.processed_csv_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows([record.__dict__ for record in data])
def main() -> None:
data_handler = DataHandler(CSV_PATH, PROCESSED_CSV_PATH)
data = data_handler.read_csv()
data_handler.write_csv(data)
# Check results
with open(PROCESSED_CSV_PATH) as f:
reader = csv.DictReader(f)
processed_data = list(reader)
print(f"Data after writing: {processed_data}")
if __name__ == "__main__":
main()
Nice solution, Kostiantyn, I would argue that this solution has increased cohesion, but I think it could be improved.
* The idea of having a class data_handler with methods, in my view, is not necessary since it does not hold any state. In this case, these methods could have been separate functions
* Which Python version are you using? If you are using 3.12 or higher, you do not need to import
List, you can use the built-in typeslistYes, you are correct. I did go through next lessons already and I saw where the excessive part.
I use python 3.11 still.
Cool!
Yeah, that is what I imagined! :)
def read_csv(file_path: str) -> list[Record]:
with open(file_path) as f:
reader = csv.DictReader(f)
return list(reader)
def save_csv(file_path: str, data: list[Record], fieldnames: list[str]=("name", "status", "is_active")) -> None:
with open(file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
processed_data.append(row_copy)
return processed_data
def main_after() -> None:
# read data
data = read_csv("data.csv")
# process data
processed_data = process_data(data)
# save data
save_csv("processed.csv", processed_data)
if __name__ == "__main__":
# main_before()
main_after()
Nice start! I do have some questions about the solution.
It seems like there should be a type or something similar for the
Record. But I can't seem to find it. Is it defined above?The type annotation for
save_csvis incorrect for the field names argument. The expected type is a list, but the default value is a tuple of values. I would also be a bit careful with having collections as a default argument because if they are mutable, you can get some unexpected behavior. In this case, using a tuple is okay because it is immutable. But, as a rule of thumb, try to not too use collections as default values.import csv
from typing import Any
Record = dict[str, Any]
def get_data_from_csv(csv_file_name: str) -> list[Record]:
with open(csv_file_name) as csv_file:
reader = csv.DictReader(csv_file)
data: list[Record] = list(reader)
return data
def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = process_row(row)
processed_data.append(row_copy)
def process_row(row: Record) -> Record:
row_copy = row.copy()
if row_copy["status"] == "active":
row_copy["is_active"] = True
else:
row_copy["is_active"] = False
return row_copy
def write_data_to_csv(data: list[Record], csv_file_name: str, filednames: list[str]) -> None:
with open(csv_file_name) as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=filednames)
writer.writeheader()
writer.writerows(data)
def main() -> None:
data = get_data_from_csv("data.csv")
processed_data = process_data(data)
write_data_to_csv(processed_data, "processed.csv", ["name", "status", "isactive"])
if __name__ == "__main__":
main()
Looks good! Nice work Alexandre
Hey Arjan and team,
Here is mi solution:
Record = dict[str, Any]
def csv_file_reader(file_path) -> list[Record]:
""" Read data from a path file """
with open(file_path) as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
return data
def set_active_status(status: str) -> bool:
""" Set the active status of a record """
if status == "active":
return True
else:
return False
def csv_file_writer(
file_path: str,
data: list[Record] = None,
fieldnames: list[str] = None
) -> None:
""" Write data to a csv file """
with open(file_path, "w") as f:
if fieldnames is None:
fieldnames = []
writer = csv.DictWriter(f, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
def process_data(data: list[Record]) -> list[Record]:
""" Process data """
processed_data : list[Record] = []
for row in data:
row_copy = row.copy()
row_copy["is_active"] = set_active_status(row["status"])
processed_data.append(row_copy)
return processed_data
def main() -> None:
file_read_path = "data.csv"
file_write_path = "processed_data.csv"
fieldnames = ["name", "status", "is_active"]
data = csv_file_reader(file_path=file_read_path)
processed_status = process_data(data)
csv_file_writer(file_path=file_write_path, data=processed_status, fieldnames=fieldnames)
Nice job with the cohesion! There are however some improvements that can be made:
*
set_active_statuscan be shortened or even removed. The conditional logicstatus == "active"does not need to be wrapper within an if-statement.Is the same as
* Is there a reason why you want to copy each row instead of doing a copy of the full data list in
process_data? In my view, it is a bit cleaner to do all sorts of copying and alteration of parameters as early as possible. In this case, make a copy of the data so you do not need to think more about itThanks
No worries! If you have any questions or discussion points, please write here or on the discord! :)
import csv
from enum import Enum
from typing import Any
Record = dict[str, Any]
class RecordStatus(Enum):
ACTIVE = "active"
INACTIVE = "inactive"
def read_from_csv_file(file_path: str) -> list[Record]:
with open(file_path) as f:
reader = csv.DictReader(f)
data: list[Record] = list(reader)
return data
def record_is_active(record: Record) -> bool:
return record["status"] == RecordStatus.ACTIVE.value
def process_data(data: list[Record]) -> list[Record]:
processed_data: list[Record] = []
for row in data:
row_copy = row.copy()
row_copy["is_active"] = record_is_active(row_copy)
processed_data.append(row_copy)
return processed_data
def write_to_csv_file(file_path: str, data: list[Record]) -> None:
with open(file_path, "w") as f:
writer = csv.DictWriter(f, fieldnames=["name", "status", "is_active"])
writer.writeheader()
writer.writerows(data)
def main() -> None:
data = read_from_csv_file("data.csv")
processed_data = process_data(data)
write_to_csv_file("processed.csv", processed_data)
if __name__ == "__main__":
main()
Nice job Scott! A nice addition that you added a custom type
Record